# ---- Packages ----
library(haven)
library(dplyr)
library(tidyr)   # for pivot_wider()
library(knitr)

# ================================
# === Table H1: Month grouping ===
# ================================
cep_all <- read_dta("Data/cep_all.dta")

# 4=Apr, 5=May, 6=Jun, 7=Jul, 8=Aug, 9=Sep, 10=Oct, 11=Nov, 12=Dec
H1 <- cep_all %>%
  mutate(
    month_group = case_when(
      encuesta_m %in% 4:6    ~ "April–June",
      encuesta_m %in% 7:9    ~ "July–September",
      encuesta_m %in% 10:12  ~ "October–December",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(month_group)) %>%
  count(month_group, name = "Freq") %>%
  mutate(month_group = factor(month_group,
                              levels = c("April–June", "July–September", "October–December"))) %>%
  arrange(month_group) %>%
  mutate(
    Percent_num    = round(100 * Freq / sum(Freq), 1),
    CumPercent_num = round(cumsum(Freq) / sum(Freq) * 100, 1),
    Percent        = paste0(Percent_num, "\\%"),
    `Cumulative %` = paste0(CumPercent_num, "\\%")
  ) %>%
  select(`Month Group` = month_group, Freq, Percent, `Cumulative %`)

# LaTeX output for H1
kable(H1,
      format   = "latex",
      booktabs = TRUE,
      caption  = "CEP Survey Month Fieldwork",
      col.names = c("Month Group", "Freq", "Percent", "Cumulative %"))

# ================================
# === Table H2: dob x dob2     ===
# ================================
wvs <- read_dta(
  "Data/WVS_Time_Series_1981-2022_stata_v5_0.dta",
  encoding   = "latin1",                    # try "UTF-8" if needed
  col_select = c("S003", "S020", "X003", "X002")
) %>%
  filter(S003 == 152) %>%
  transmute(
    year_survey = as.numeric(S020),
    age         = as.numeric(X003),
    dob         = as.numeric(X002),
    dob2        = year_survey - age
  )

H2 <- wvs %>%
  filter(dob >= 1953, dob <= 1957, dob2 < 1958) %>%
  count(dob, dob2) %>%
  arrange(dob, dob2) %>%
  pivot_wider(names_from = dob2, values_from = n, values_fill = 0) %>%
  arrange(dob) %>%
  mutate(dob = as.character(dob))  # ensure consistent type for binding

# Add totals
num_cols0 <- setdiff(names(H2), "dob")
H2 <- H2 %>% mutate(Total = rowSums(across(all_of(num_cols0)), na.rm = TRUE))

col_totals <- colSums(H2[num_cols0], na.rm = TRUE)
H2_totals_row <- data.frame(
  dob = "Total",
  t(col_totals),
  Total = sum(col_totals),
  check.names = FALSE
)

H2_final <- bind_rows(H2, H2_totals_row)

# LaTeX output for H2
kable(H2_final,
      format   = "latex",
      booktabs = TRUE,
      caption  = "Distribution of Individuals born 1953–1957 Across Assigned Year of Birth. Source: WVS")
